{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 实验题目：BLAST\n",
    "## 实验内容\n",
    "* 通过本地版本或在线版的方式来寻找inputFasta文件中相似序列，保存为xml格式，命名为OutBlast.xml\n",
    "* 解析BLAST运行结果，提取所有e-value小于0.01的结果，保存到Results.txt文件中\n",
    "\n",
    "### Import the packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# @Date:   2019-04-29T11:11:49+08:00\n",
    "# @Email:  1730416009@stu.suda.edu.cn\n",
    "# @Project: M_BioPy\n",
    "# @Last modified time: 2019-04-29T11:20:04+08:00\n",
    "\n",
    "# Import packages\n",
    "from Bio.Blast import NCBIWWW\n",
    "from Bio.Blast import NCBIXML"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Def a function that use BioPy to collect data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getBlastInfo(fasta_string, blastType, seqType):\n",
    "    result_handle = NCBIWWW.qblast(blastType, seqType, fasta_string)\n",
    "    with open(\"OutBlast.xml\", \"w\") as save_file:\n",
    "        save_file.write(result_handle.read())\n",
    "        result_handle.close()\n",
    "    result_handle = open(\"OutBlast.xml\")\n",
    "    return NCBIXML.parse(result_handle)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set Cutoff , Filter info and write output in main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    fasta_string = open('inputFasta', 'rt').read()\n",
    "    blast_record = getBlastInfo(fasta_string, \"blastp\", \"pdbaa\")\n",
    "    E_VALUE_THRESH = 0.01\n",
    "    with open('Results.txt', 'wt') as outputFile:\n",
    "        for i in blast_record:\n",
    "            for alignment in i.alignments:\n",
    "                for hsp in alignment.hsps:\n",
    "                    if hsp.expect < E_VALUE_THRESH:\n",
    "                        outputFile.write('****Alignment****\\n')\n",
    "                        outputFile.write('sequence:%s\\n' % (alignment.title))\n",
    "                        outputFile.write('length%s\\n:' % (alignment.length))\n",
    "                        outputFile.write('e value:%s\\n' % (hsp.expect))\n",
    "                        outputFile.write('%s...\\n' % (hsp.query[0:75]))\n",
    "                        outputFile.write('%s...\\n' % (hsp.match[0:75]))\n",
    "                        outputFile.write('%s...\\n' % (hsp.sbjct[0:75]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Main() when run codes individually"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
